##########################################################
# Project:    Talking to the Populist Radical Right
# Task:       The script counts the mentions of parties in 
#             speeches in the Swedish parliament and
#             annotates the data set
# Author:     Jan Schwalbach (21/07/2022)
##########################################################

# Loading packages and data

library(quanteda)
library(ggrepel)
library(grid)
library(zoo)
library(pscl)
library(aod)
library(ggplot2)

load(file="Corpus_Sweden.Rdata")

# Deleting parties that are not analysed and the speaker

corpusSW <- corpusSW[corpusSW$type != "0",]
corpusSW <- corpusSW[corpusSW$party != "-",]
corpusSW <- corpusSW[!is.na(corpusSW$party),]
corpusSW$party <- gsub("FP", "L", corpusSW$party)
corpusSW$partyyear <- paste(corpusSW$party,corpusSW$year)
corpusSW <- corpusSW[corpusSW$party != "NYD",]
corpusSW$partyyear <- gsub("FP", "L", corpusSW$partyyear)
corpusSW$partyquarter <- gsub("FP", "L", corpusSW$partyquarter)
corpusSW <- corpusSW[corpusSW$sessiondate >= "2010-09-19",]
corpusSW <- corpusSW[corpusSW$sessiondate < "2018-09-09",]

# Annotating speeches for topics

corpusSW$immigration <- 0
corpusSW$immigration[!corpusSW$migpres1!="TRUE"] <- 1
corpusSW$immigration[!corpusSW$migrationcount<=2] <- 1
corpusSW$immigration[corpusSW$migrationcount==0] <- 0


corpusSW$education <- 0
corpusSW$education[!corpusSW$educationpres1!="TRUE"] <- 1
corpusSW$education[!corpusSW$educationcount<=2] <- 1
corpusSW$education[corpusSW$educationcount==0] <- 0

# Loading the dictionaries and counting for each party

dictionary <- read.csv(file = "C.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$C <- str_trim(dictionary$C, side = "both")
C <- paste(" ", paste(dictionary$C[dictionary$C != ""], collapse = " | "), " ", sep = "")
corpusSW$Ccount <- str_count(corpusSW$text, C)
corpusSW$Cpres <- ifelse(corpusSW$Ccount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "FP.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$FP <- str_trim(dictionary$FP, side = "both")
FP <- paste(" ", paste(dictionary$FP[dictionary$FP != ""], collapse = " | "), " ", sep = "")
corpusSW$FPcount <- str_count(corpusSW$text, FP) 
corpusSW$FPpres <- ifelse(corpusSW$FPcount > 0, TRUE, FALSE)

dictionary <- read.csv(file = "KD.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$KD <- str_trim(dictionary$KD, side = "both")
KD <- paste(" ", paste(dictionary$KD[dictionary$KD != ""], collapse = " | "), " ", sep = "")
corpusSW$KDcount <- str_count(corpusSW$text, KD) 
corpusSW$KDpres <- ifelse(corpusSW$KDcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "M.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$M <- str_trim(dictionary$M, side = "both")
M <- paste(" ", paste(dictionary$M[dictionary$M != ""], collapse = " | "), " ", sep = "")
corpusSW$Mcount <- str_count(corpusSW$text, M) 
corpusSW$Mpres <- ifelse(corpusSW$Mcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "MP.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$MP <- str_trim(dictionary$MP, side = "both")
MP <- paste(" ", paste(dictionary$MP[dictionary$MP != ""], collapse = " | "), " ", sep = "")
corpusSW$MPcount <- str_count(corpusSW$text, MP) 
corpusSW$MPpres <- ifelse(corpusSW$MPcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "S.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$S <- str_trim(dictionary$S, side = "both")
S <- paste(" ", paste(dictionary$S[dictionary$S != ""], collapse = " | "), " ", sep = "")
corpusSW$Scount <- str_count(corpusSW$text, S) 
corpusSW$Spres <- ifelse(corpusSW$Scount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "SD.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$SD <- str_trim(dictionary$SD, side = "both")
SD <- paste(" ", paste(dictionary$SD[dictionary$SD != ""], collapse = " | "), " ", sep = "")
corpusSW$SDcount <- str_count(corpusSW$text, SD)
corpusSW$SDpres <- ifelse(corpusSW$SDcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "V.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$V <- str_trim(dictionary$V, side = "both")
V <- paste(" ", paste(dictionary$V[dictionary$V != ""], collapse = " | "), " ", sep = "")
corpusSW$Vcount <- str_count(corpusSW$text, V)
corpusSW$Vpres <- ifelse(corpusSW$Vcount > 0, TRUE, FALSE) 

Ccount <- aggregate(corpusSW$Ccount, by=list(Category=corpusSW$party), FUN=sum)
names(Ccount)[2] <- "C"
FPcount <- aggregate(corpusSW$FPcount, by=list(Category=corpusSW$party), FUN=sum)
names(FPcount)[2] <- "L"
FPcount[1] <- NULL
KDcount <- aggregate(corpusSW$KDcount, by=list(Category=corpusSW$party), FUN=sum)
names(KDcount)[2] <- "KD"
KDcount[1] <- NULL
Mcount <- aggregate(corpusSW$Mcount, by=list(Category=corpusSW$party), FUN=sum)
names(Mcount)[2] <- "M"
Mcount[1] <- NULL
MPcount <- aggregate(corpusSW$MPcount, by=list(Category=corpusSW$party), FUN=sum)
names(MPcount)[2] <- "MP"
MPcount[1] <- NULL
Scount <- aggregate(corpusSW$Scount, by=list(Category=corpusSW$party), FUN=sum)
names(Scount)[2] <- "S"
Scount[1] <- NULL
SDcount <- aggregate(corpusSW$SDcount, by=list(Category=corpusSW$party), FUN=sum)
names(SDcount)[2] <- "SD"
SDcount[1] <- NULL
Vcount <- aggregate(corpusSW$Vcount, by=list(Category=corpusSW$party), FUN=sum)
names(Vcount)[2] <- "V"
Vcount[1] <- NULL

# Combining all party counts

partycounts <- cbind(Ccount,FPcount,KDcount,Mcount,MPcount,Scount,SDcount,Vcount)
partycounts$party <- partycounts$Category

partycounts$C[partycounts$party == "C"] <- 0
partycounts$C[partycounts$party == "L"] <- 0
partycounts$KD[partycounts$party == "KD"] <- 0
partycounts$M[partycounts$party == "M"] <- 0
partycounts$MP[partycounts$party == "MP"] <- 0
partycounts$S[partycounts$party == "S"] <- 0
partycounts$SD[partycounts$party == "SD"] <- 0
partycounts$V[partycounts$party == "V"] <- 0

partycounts$SD_all <- (partycounts$SD)/(partycounts$C+partycounts$L+partycounts$KD+partycounts$M+partycounts$MP+partycounts$S+partycounts$V+partycounts$SD)

partycounts$Category <- NULL

# Counting the shares for all/left/right parties

countsall <- partycounts[partycounts$party != "SD",]

countsleft <- countsall[countsall$party != "KD",]
countsleft <- countsleft[countsleft$party != "C",]
countsleft <- countsleft[countsleft$party != "M",]
countsleft <- countsleft[countsleft$party != "L",]

countsright <- countsall[countsall$party != "S",]
countsright <- countsright[countsright$party != "V",]
countsright <- countsright[countsright$party != "MP",]

mean(countsleft$SD_all)
mean(countsright$SD_all)
mean(countsall$SD_all)

# Preparing and annotating the data set for the logit regression

corpusSW <- corpusSW[corpusSW$party != "SD",]

# Left parties

corpusSW$left <- 0
corpusSW$left[corpusSW$party == "S"] <- 1
corpusSW$left[corpusSW$party == "MP"] <- 1
corpusSW$left[corpusSW$party == "V"] <- 1

# Mainstream parties

corpusSW$mainstream <- 0
corpusSW$mainstream[corpusSW$party == "S"] <- 1
corpusSW$mainstream[corpusSW$party == "M"] <- 1
corpusSW$RWdummy <- corpusSW$SDcount
corpusSW$RWdummy[corpusSW$RWdummy >= 1] <- 1

# Term

corpusSW$country <- "Sweden"
corpusSW$term <- 1
corpusSW$term[corpusSW$sessiondate >= "2014-09-14" & corpusSW$sessiondate < "2018-09-09"] <- 2

# Government Type/party and right-wing populist size

corpusSW$minority <- 1
corpusSW$RRP_size <- 0
corpusSW$RRP_size[corpusSW$term == 1] <- 5.7
corpusSW$RRP_size[corpusSW$term == 2] <- 14

corpusSW$support <- 0
corpusSW$Government <- "Opposition"
corpusSW$Government[corpusSW$party == "M" & corpusSW$term == 1] <- "Government"
corpusSW$Government[corpusSW$party == "KD" & corpusSW$term == 1] <- "Government"
corpusSW$Government[corpusSW$party == "L" & corpusSW$term == 1] <- "Government"
corpusSW$Government[corpusSW$party == "C" & corpusSW$term == 1] <- "Government"
corpusSW$Government[corpusSW$party == "S" & corpusSW$term == 2] <- "Government"
corpusSW$Government[corpusSW$party == "MP" & corpusSW$term == 2] <- "Government"

Sweden <- subset(corpusSW, select=c("RWdummy", "immigration", "education", "left", "mainstream", "type", "term", "minority", "RRP_size", "support", "Government", "country"))
save(Sweden, file = "Sweden_party_mentions.Rdata") 